import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

msg=pd.read_csv('data6.csv',names=['message','label'])
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
Y=msg.labelnum
    
    
xtrain,xtest,ytrain,ytest=train_test_split(X,Y)
    
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm = count_vect.transform(xtest)
df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df)

clf=MultinomialNB().fit(xtrain_dtm,ytrain)
predicted=clf.predict(xtest_dtm)

for doc,p in zip(xtest,predicted):
    pred="pos" if p==1 else "neg"
    print('%s->%s'%(doc,pred))

from sklearn import metrics    
print('\nACCURACY:')
print(metrics.accuracy_score(ytest,predicted))
print('\nconfusion_matrix:')
print(metrics.confusion_matrix(ytest,predicted))
print('\nRecall score:')
print(metrics.recall_score(ytest,predicted))
print('\precision score:')
print(metrics.precision_score(ytest,predicted))